#################### PREP FRISP DATA FOR CREATION OF TABLES 1, 2, 3, AND 4 #################

  rm(list = ls())
  library(foreign)
  library(gam)
  library(Hmisc)
  #set pathway
  data1 <- read.spss("FFRISPWave4_JAN_09.sav")
  attach(data1)

  # create smoking status variables 
  weights <- finalwgt/mean(finalwgt, na.rm=TRUE)
  neversmoker <- as.numeric(Q52=="No")
  currentsmoker <- as.numeric(as.numeric(Q52B)<3)
  currentsmoker[neversmoker==1] <- 0
  formersmoker <- as.numeric(as.numeric(Q52B)==3)
  formersmoker[neversmoker==1] <- 0
  currentvsformer <- currentsmoker
  currentvsformer[neversmoker==1] <- NA
  currentvsnever <- currentsmoker
  currentvsnever[formersmoker==1] <- NA
  smokestatus <- neversmoker+2*formersmoker+3*currentsmoker

  # expected cancer rates for smokers and non-smokers 
  table(SMOKE)
  summary(Q108A) #Non-Smoker 1
  summary(Q108B) #Smoker 1
  
  nonsmokercancer <- Q108A
  nonsmokercancernofollow <- Q108A
  smokercancer <- Q108B
  smokercancernofollow <- Q108B
  
  nonsmokercancer[SMOKE=="One" & !is.na(Q110)==TRUE] <- Q110[SMOKE=="One" & !is.na(Q110)==TRUE]
  smokercancer[SMOKE=="Two" & !is.na(Q110)==TRUE] <- Q110[SMOKE=="Two" & !is.na(Q110)==TRUE]
  
  summary(Q111A) #Non-Smoker 2
  summary(Q111B) #Smoker 2
  
  nonsmokercancer[SMOKE=="Two" & !is.na(SMOKE)==TRUE] <- Q111A[SMOKE=="Two" & !is.na(SMOKE)==TRUE]
  nonsmokercancernofollow[SMOKE=="Two" & !is.na(SMOKE)==TRUE] <- Q111A[SMOKE=="Two" & !is.na(SMOKE)==TRUE]
  
  smokercancer[SMOKE=="One" & !is.na(SMOKE)==TRUE] <- Q111B[SMOKE=="One" & !is.na(SMOKE)==TRUE]
  smokercancernofollow[SMOKE=="One" & !is.na(SMOKE)==TRUE] <- Q111B[SMOKE=="One" & !is.na(SMOKE)==TRUE]
  
  nonsmokercancer[SMOKE=="Two" & !is.na(Q113)==TRUE] <- Q113[SMOKE=="Two" & !is.na(Q113)==TRUE]
  smokercancer[SMOKE=="One" & !is.na(Q113)==TRUE] <- Q113[SMOKE=="One" & !is.na(Q113)==TRUE]
  
  wtd.table(nonsmokercancernofollow, weights)$sum.of.weights/sum(wtd.table(nonsmokercancernofollow, weights)$sum.of.weights) #find % who said 500 initially
  wtd.table(smokercancernofollow, weights)$sum.of.weights/sum(wtd.table(smokercancernofollow, weights)$sum.of.weights) #find % who said 500 initially
  
  (sum(weights[SMOKE=="One" & !is.na(Q109) & Q109=="Because I don't know"])+sum(weights[SMOKE=="Two" & !is.na(Q112) & Q112=="Because I don't know"]))/sum(wtd.table(smokercancernofollow, weights)$sum.of.weights) # % who said 500 because they do not know
  (sum(weights[SMOKE=="Two" & !is.na(Q109)  & Q109=="Because I don't know"])+sum(weights[SMOKE=="One" & !is.na(Q112)  & Q112=="Because I don't know"]))/sum(wtd.table(nonsmokercancernofollow, weights)$sum.of.weights) # % who said 500 because they do not know

  # weighted t test to check if order of questions is realted to results 
  order1 <- glm(smokercancer~SMOKE, weight=weights)
  order2 <- glm(nonsmokercancer~SMOKE, weight=weights)

  # crete variable for attributable risk 
  smokerdif <- smokercancer-nonsmokercancer
  smokerdifnofollow <- smokercancernofollow-nonsmokercancernofollow
  smokerdifnoneg <- smokerdif
  smokerdifnoneg[smokerdif<0 & !is.na(smokerdif)] <- 0

  # create variable for relative risk
  rateincrease <- (smokercancer+1)/(nonsmokercancer+1)
  rateincreasenofollow <- (smokercancernofollow+1)/(nonsmokercancernofollow+1)
  rateincreasenoneg <- rateincrease
  rateincreasenoneg[smokerdif<0 & !is.na(smokerdif)] <- 1
  
  rateincrease2 <- rateincrease
  rateincrease2[nonsmokercancer==0 & !is.na(nonsmokercancer)] <- smokercancer[nonsmokercancer==0 & !is.na(nonsmokercancer)]/1
  rateincrease2[smokercancer==0] <- NA
  
  sr <- log(rateincrease)
  srnf <- log(rateincreasenofollow)
  srnn <- log(rateincreasenoneg)

  # create data frame for predicted values 
  varset <- data.frame(smokerdif, rateincrease, sr, nonsmokercancer, smokercancer)

  # mark cases where incorrect question was asked on certainty (N=3)
  findproblems <- !is.na(Q114B) & smokerdif>=0
  findproblems2 <- !is.na(Q114A) & smokerdif!=0
  prob <- findproblems+findproblems2
  
  # misc recoding
  questionorder <- as.numeric(SMOKE)
  
  #set weights to mean to 1
  weights <- finalwgt/mean(finalwgt, na.rm=TRUE)

  # filter to drop cases with missing data or relative risks of < -450 
  filter <- !is.na(sr) & smokerdif>=-450 #& smokercancer!=500 & nonsmokercancer!=500
  filter2 <- !is.na(sr) & smokercancer!=500 & nonsmokercancer!=500
  certfilter <- filter*(as.numeric(prob==0))

  cvffilt <- filter*!is.na(currentvsformer)
  cvnfilt <- filter*!is.na(currentvsnever)

  #################### PRODUCE TABLE 4 PART 1: Predicting Quitting #####################
  
  nfor1 <- sum(weights[!is.na(currentvsformer) & !is.na(smokercancer) & !is.na(sr) & smokerdif>=-450])
  
  smoking.gam1 <- gam(currentvsformer ~ s(smokercancer) + s(sr), subset = filter==1, family=gaussian(), weights=weights)
  smoking.gam1x <- gam(currentvsformer ~ s(smokercancer) + s(sr), subset = filter==1, family=gaussian(), weights=weights)
  smoking.gam1y <- gam(currentvsformer ~ s(smokercancer) + s(sr), subset = filter==1, family=gaussian(), weights=weights)
  
  varsetrr1 <- varset
  varsetrr1$sr <- wtd.quantile(varset$sr[cvffilt==1], weights[cvffilt==1], .25, na.rm=TRUE) #Use this without a filter for current versus former in other datasets
  varsetrr2 <- varset
  varsetrr2$sr <- wtd.quantile(varset$sr[cvffilt==1], weights[cvffilt==1], .75, na.rm=TRUE)

#Relative vs. Absolute (Relative Coef)
  mean(predict(smoking.gam1, newdata=varsetrr1, type="response") - predict(smoking.gam1, newdata=varsetrr2, type="response"), na.rm=T)
  
  varsetar1 <- varset
  varsetar1$smokercancer <- wtd.quantile(varset$smokercancer[cvffilt==1], weights[cvffilt==1], .25, na.rm=TRUE)
  varsetar2 <- varset
  varsetar2$smokercancer <- wtd.quantile(varset$smokercancer[cvffilt==1], weights[cvffilt==1], .75, na.rm=TRUE)
 
#Relative vs. Absolute (Absolute Coef)
  mean(predict(smoking.gam1, newdata=varsetar1, type="response") - predict(smoking.gam1, newdata=varsetar2, type="response"), na.rm=T)
  
  varsetat1 <- varset
  varsetat1$smokerdif <- wtd.quantile(varset$smokerdif[cvffilt==1], weights[cvffilt==1], .25, na.rm=TRUE)
  varsetat2 <- varset
  varsetat2$smokerdif <- wtd.quantile(varset$smokerdif[cvffilt==1], weights[cvffilt==1], .75, na.rm=TRUE)

#LR Test Relative vs. Absolute    
  smoking.gam11 <- gam(currentvsformer ~ s(smokercancer), subset = filter==1, family=gaussian(), weights=weights)
  smoking.gam12 <- gam(currentvsformer ~ s(sr), subset = filter==1, family=gaussian(), weights=weights)

  anova(smoking.gam1, smoking.gam11)
  anova(smoking.gam1, smoking.gam12)
  
  nfor1a <- sum(weights[!is.na(currentvsformer) & !is.na(smokerdif) & !is.na(sr)])
 
#LR Test Relative vs. Attributable 
  smoking.gam1a <- gam(currentvsformer ~ s(smokerdif) + s(sr), subset = filter==1, family=gaussian(), weights=weights)
  smoking.gam1a1 <- gam(currentvsformer ~ s(smokerdif), subset = filter==1, family=gaussian(), weights=weights)
  smoking.gam1a2 <- gam(currentvsformer ~ s(sr), subset = filter==1, family=gaussian(), weights=weights)
 
  anova(smoking.gam1a, smoking.gam1a1)
  anova(smoking.gam1a, smoking.gam1a2)
  
# Relative vs. Attibutable (Relative Coef)  
  mean(predict(smoking.gam1a, newdata=varsetrr1, type="response") - predict(smoking.gam1a, newdata=varsetrr2, type="response"), na.rm=T)

# Relative vs. Attributable (Attributable Coef)
  mean(predict(smoking.gam1a, newdata=varsetat1, type="response") - predict(smoking.gam1a, newdata=varsetat2, type="response"), na.rm=T)

  # create certainty measures 
  certainty <- Q114A
  c1 <- rep(NA, length(Q114A))
  c1[!is.na(Q114A)] <- 1
  certainty[is.na(Q114A)] <- Q114B[is.na(Q114A)]
  c1[!is.na(Q114B)] <- 2
  certainty[!is.na(Q114C)] <- Q114C[!is.na(Q114C)]
  c1[!is.na(Q114C)] <- 3
  
  cert <- (1-as.numeric(certainty))
  highcertainty <- as.numeric(certainty=="Extremely certain")
  sum(weights[highcertainty==1], na.rm=TRUE)/sum(weights[!is.na(certainty)], na.rm=TRUE)  #proportion high certainty
  
  lowcertainty <- as.numeric(certainty=="Slightly certain")
  lowcertainty[certainty=="Not certain at all"] <- 1
  lowcertainty[certainty=="Moderately certain"] <- 1
  lowcertainty[certainty=="Very certain"] <- 1
  
  extremeandvery <- highcertainty+as.numeric(certainty=="Very certain")
  modandless <- lowcertainty-as.numeric(certainty=="Very certain")
  
  sum(weights[lowcertainty==1], na.rm=TRUE)/sum(weights[!is.na(certainty)], na.rm=TRUE)
  
  extremecertain <- as.numeric(certainty=="Extremely certain")
  top2certain <- as.numeric(certainty=="Extremely certain")+as.numeric(certainty=="Very certain")

  # filter to drop cases with missing data or relative rsisks of < -450
  filter <- !is.na(sr) & smokerdif>=-450 #& smokercancer!=500 & nonsmokercancer!=500
  filter2 <- !is.na(sr) & smokercancer!=500 & nonsmokercancer!=500
  certfilter <- filter*(as.numeric(prob==0))

  # create data frame for predicted values 
  varset <- data.frame(smokerdif, rateincrease, sr, nonsmokercancer, smokercancer, srnn)

  # function to test certainty 
  certfunc <- function(outcome, relativerisk, certaintymeasure, filt){
    varset <- data.frame(relativerisk, questionorder)
    certain.gam1 <- gam(outcome ~ s(relativerisk), subset = filt==1 & certaintymeasure==1, family=gaussian(), weights=weights)
    certain.gam1x <- gam(outcome ~ 1, subset = filt==1 & certaintymeasure==1, family=gaussian(), weights=weights)
    varsetrr1 <- varset
    varsetrr1$relativerisk <- wtd.quantile(varset$relativerisk[filt==1], weights[filt==1], .25, na.rm=TRUE)
    varsetrr2 <- varset
    varsetrr2$relativerisk <- wtd.quantile(varset$relativerisk[filt==1], weights[filt==1], .75, na.rm=TRUE)
    difcert <- mean(predict(certain.gam1, newdata=varsetrr1, type="response") - predict(certain.gam1, newdata=varsetrr2, type="response"), na.rm=T)
    pcert <- anova(certain.gam1, certain.gam1x, test="Chisq")$P[2]
    uncertain.gam1 <- gam(outcome ~ s(relativerisk), subset = filt==1 & certaintymeasure==0, family=gaussian(), weights=weights)
    uncertain.gam1x <- gam(outcome ~ 1, subset = filt==1 & certaintymeasure==0, family=gaussian(), weights=weights)
    difuncert <- mean(predict(uncertain.gam1, newdata=varsetrr1, type="response") - predict(uncertain.gam1, newdata=varsetrr2, type="response"), na.rm=T)
    puncert <- anova(uncertain.gam1, uncertain.gam1x, test="Chisq")$P[2]
    out <- cbind(difcert, pcert, difuncert, puncert)
    out
  }

  certaincvf <- certfunc(currentvsformer, sr, extremecertain, cvffilt)
  certaincvf

  library(mgcv)
    interact.gam1 <- gam(currentvsformer ~ s(sr, by=extremecertain)+s(sr, by=(1-extremecertain)), subset = certfilter==1, data=varset, family=gaussian(), weights)
    interact.gam2 <- gam(currentvsformer ~ s(sr), subset = certfilter==1, data=varset, family=gaussian(), weights)
    anova(interact.gam1, interact.gam2, test="Chisq")
  detach("package:mgcv")
  library(gam)

#################### PRODUCE TABLE 4 PART 2: Predicting Desire to Quit #####################
    
  wantstostop <- 2-as.numeric(Q115)
  stopfilt <- filter*!is.na(wantstostop)
  
  varsetrr1 <- varset
  varsetrr1$sr <- wtd.quantile(varset$sr[stopfilt==1], weights[stopfilt==1], .25, na.rm=TRUE)
  varsetrr2 <- varset
  varsetrr2$sr <- wtd.quantile(varset$sr[stopfilt==1], weights[stopfilt==1], .75, na.rm=TRUE)
  
  varsetar1 <- varset
  varsetar1$smokercancer <- wtd.quantile(varset$smokercancer[stopfilt==1], weights[stopfilt==1], .25, na.rm=TRUE)
  varsetar2 <- varset
  varsetar2$smokercancer <- wtd.quantile(varset$smokercancer[stopfilt==1], weights[stopfilt==1], .75, na.rm=TRUE)
  
  varsetat1 <- varset
  varsetat1$smokerdif <- wtd.quantile(varset$smokerdif[stopfilt==1], weights[stopfilt==1], .25, na.rm=TRUE)
  varsetat2 <- varset
  varsetat2$smokerdif <- wtd.quantile(varset$smokerdif[stopfilt==1], weights[stopfilt==1], .75, na.rm=TRUE)

#LR Test Relative vs. Attributable    
  stop.gam1a <- gam(wantstostop ~ s(smokerdif) + s(sr), subset = filter==1, family=gaussian(), weights=weights)
  stop.gam1a1 <- gam(wantstostop ~ s(smokerdif), subset = filter==1, family=gaussian(), weights=weights)
  stop.gam1a2 <- gam(wantstostop ~ s(sr), subset = filter==1, family=gaussian(), weights=weights)
  
  anova(stop.gam1a, stop.gam1a1, test="Chisq")
  anova(stop.gam1a, stop.gam1a2, test="Chisq")
  
# Relative vs Attributable (Relative Coef)
  mean(predict(stop.gam1a, newdata=varsetrr2, type="response") - predict(stop.gam1a, newdata=varsetrr1, type="response"), na.rm=T)
  
# Relative vs Attributable (Attributable Coef)
  mean(predict(stop.gam1a, newdata=varsetat2, type="response") - predict(stop.gam1a, newdata=varsetat1, type="response"), na.rm=T)

#LR Test Relative vs. Absolue    
  stop.gam1ax <- gam(wantstostop ~ s(smokercancer) + s(sr), subset = filter==1, family=gaussian(), weights=weights)
  stop.gam1ax1 <- gam(wantstostop ~ s(smokercancer), subset = filter==1, family=gaussian(), weights=weights)
  stop.gam1ax2 <- gam(wantstostop ~ s(sr), subset = filter==1, family=gaussian(), weights=weights)
  
  anova(stop.gam1ax, stop.gam1ax1, test="Chisq")
  anova(stop.gam1ax, stop.gam1ax2, test="Chisq")
  
# Relative vs. Absolute (Relative Coef)
  mean(predict(stop.gam1ax, newdata=varsetrr2, type="response") - predict(stop.gam1ax, newdata=varsetrr1, type="response"), na.rm=T)

  # Relative vs. Absolute (Absolute Coef)  
  mean(predict(stop.gam1ax, newdata=varsetar2, type="response") - predict(stop.gam1ax, newdata=varsetar1, type="response"), na.rm=T)

  ### CURRENT vs NEVER 

  varsetrr1 <- varset
  varsetrr1$sr <- wtd.quantile(varset$sr[cvnfilt==1], weights[cvnfilt==1], .25, na.rm=TRUE)
  varsetrr2 <- varset
  varsetrr2$sr <- wtd.quantile(varset$sr[cvnfilt==1], weights[cvnfilt==1], .75, na.rm=TRUE)
  
  varsetar1 <- varset
  varsetar1$smokercancer <- wtd.quantile(varset$smokercancer[cvnfilt==1], weights[cvnfilt==1], .25, na.rm=TRUE)
  varsetar2 <- varset
  varsetar2$smokercancer <- wtd.quantile(varset$smokercancer[cvnfilt==1], weights[cvnfilt==1], .75, na.rm=TRUE)
  
  varsetat1 <- varset
  varsetat1$smokerdif <- wtd.quantile(varset$smokerdif[cvnfilt==1], weights[cvnfilt==1], .25, na.rm=TRUE)
  varsetat2 <- varset
  varsetat2$smokerdif <- wtd.quantile(varset$smokerdif[cvnfilt==1], weights[cvnfilt==1], .75, na.rm=TRUE)
  
  smoking.gam3 <- gam(currentvsnever ~ s(smokercancer) + s(sr), subset = filter==1, family=gaussian(), weights=weights)
  smoking.gam31 <- gam(currentvsnever ~ s(smokercancer), subset = filter==1, family=gaussian(), weights=weights)
  smoking.gam32 <- gam(currentvsnever ~ s(sr), subset = filter==1, family=gaussian(), weights=weights)
  
  anova(smoking.gam3, smoking.gam31, test="Chisq")
  anova(smoking.gam3, smoking.gam32, test="Chisq")
  
  mean(predict(smoking.gam3, newdata=varsetrr1, type="response") - predict(smoking.gam3, newdata=varsetrr2, type="response"), na.rm=T)
  mean(predict(smoking.gam3, newdata=varsetar1, type="response") - predict(smoking.gam3, newdata=varsetar2, type="response"), na.rm=T)
  
  smoking.gam3a <- gam(currentvsnever ~ s(smokerdif) + s(sr),  subset = filter==1, family=gaussian(), weights=weights)
  smoking.gam3a1 <- gam(currentvsnever ~ s(smokerdif), subset = filter==1, family=gaussian(), weights=weights)
  smoking.gam3a2 <- gam(currentvsnever ~ s(sr), subset = filter==1, family=gaussian(), weights=weights)
  
  anova(smoking.gam3a, smoking.gam3a1, test="Chisq")
  anova(smoking.gam3a, smoking.gam3a2, test="Chisq")
  
  mean(predict(smoking.gam3a, newdata=varsetrr1, type="response") - predict(smoking.gam3a, newdata=varsetrr2, type="response"), na.rm=T)
  mean(predict(smoking.gam3a, newdata=varsetat1, type="response") - predict(smoking.gam3a, newdata=varsetat2, type="response"), na.rm=T)

###################### PRODUCE MAIN ANALYSIS TABLES 1 2 AND 3 ##############################

  nssum <- sum(table(nonsmokercancer))
  ssum <- sum(table(smokercancer))
  sum(nonsmokercancer<=50 & !is.na(nonsmokercancer))/nssum
  
  inran <- function(a, b){
    nsout <- sum(weights[nonsmokercancer<=b & nonsmokercancer>=a & !is.na(nonsmokercancer)])/sum(weights[!is.na(nonsmokercancer)])
    sout <- sum(weights[smokercancer<=b & smokercancer>=a & !is.na(smokercancer)])/sum(weights[!is.na(smokercancer)])
    out <- c(nsout, sout)
    names(out) <- c(paste("Nonsmoker ", a, "--", b, sep=""), paste("Smoker ", a, "--", b, sep=""))
    out
  }
  
  inrancf <- function(a, b){
    nsout <- sum(weights[nonsmokercancer<=b & nonsmokercancer>=a & !is.na(nonsmokercancer) & !is.na(currentvsformer)])/sum(weights[!is.na(nonsmokercancer) & !is.na(currentvsformer)])
    sout <- sum(weights[smokercancer<=b & smokercancer>=a & !is.na(smokercancer) & !is.na(currentvsformer)])/sum(weights[!is.na(smokercancer) & !is.na(currentvsformer)])
    out <- c(nsout, sout)
    names(out) <- c(paste("Nonsmoker ", a, "--", b, sep=""), paste("Smoker ", a, "--", b, sep=""))
    out
  }

  inrancn <- function(a, b){
    nsout <- sum(weights[nonsmokercancer<=b & nonsmokercancer>=a & !is.na(nonsmokercancer) & !is.na(currentvsnever) & currentvsnever==0])/sum(weights[!is.na(nonsmokercancer) & !is.na(currentvsnever) & currentvsnever==0])
    sout <- sum(weights[smokercancer<=b & smokercancer>=a & !is.na(smokercancer) & !is.na(currentvsnever) & currentvsnever==0])/sum(weights[!is.na(smokercancer) & !is.na(currentvsnever) & currentvsnever==0])
    out <- c(nsout, sout)
    names(out) <- c(paste("Nonsmoker ", a, "--", b, sep=""), paste("Smoker ", a, "--", b, sep=""))
    out
  }

  first <- c(0,51,101,151,201,251,301,351,401,451,500,501,551,601,651,701,751,801,851,901,951)
  second <- c(50,100,150,200,250,300,350,400,450,499,500,550,600,650,700,750,800,850,900,950,1000)
  
  bound <- inran(first[1], second[1])
  boundcvf <- inrancf(first[1], second[1])
  boundcvn <- inrancn(first[1], second[1])
  bnames <- "From 0 to 50"
  for(i in 2:length(second)){
    bound <- rbind(bound, inran(first[i], second[i]))
    boundcvf <- rbind(boundcvf, inrancf(first[i], second[i]))
    boundcvn <- rbind(boundcvn, inrancn(first[i], second[i]))
    bnames <- c(bnames, paste("From", first[i], "to", second[i]))
  }
  rownames(bound) <- bnames
  colnames(bound) <- c("Nonsmoker", "Smoker")
  rownames(boundcvf) <- bnames
  colnames(boundcvf) <- c("Nonsmoker", "Smoker")
  rownames(boundcvn) <- bnames
  colnames(boundcvn) <- c("Nonsmoker", "Smoker")
  
  newbound <- round(bound*100, digits=2)

  write.csv(bound, "percentages.csv")
  
  ##########################  TABLE ONE    
  
  #Current & Former Smokers 
    write.csv(boundcvf, "percentagescvf.csv")
  
  #Never Smokers 
    write.csv(boundcvn, "percentagescvn.csv")

  rebound <- paste(newbound, "%", sep="")
  mean(nonsmokercancer, na.rm=TRUE)
  
  wtd.mean(nonsmokercancer, weights, na.rm=TRUE)
  wtd.mean(smokercancer, weights, na.rm=TRUE)
  wtd.mean(nonsmokercancer[!is.na(currentvsformer)], weights[!is.na(currentvsformer)], na.rm=TRUE)
  wtd.mean(smokercancer[!is.na(currentvsformer)], weights[!is.na(currentvsformer)], na.rm=TRUE)
  wtd.mean(nonsmokercancer[!is.na(currentvsnever) & currentvsnever==0], weights[!is.na(currentvsnever) & currentvsnever==0], na.rm=TRUE)
  wtd.mean(smokercancer[!is.na(currentvsnever) & currentvsnever==0], weights[!is.na(currentvsnever) & currentvsnever==0], na.rm=TRUE)
  
  wtd.quantile(nonsmokercancer, weights, probs=.5, na.rm=TRUE)
  wtd.quantile(smokercancer, weights, probs=.5, na.rm=TRUE)
  wtd.quantile(nonsmokercancer[!is.na(currentvsformer)], weights[!is.na(currentvsformer)], probs=.5, na.rm=TRUE)
  wtd.quantile(smokercancer[!is.na(currentvsformer)], weights[!is.na(currentvsformer)], probs=.5, na.rm=TRUE)
  wtd.quantile(nonsmokercancer[!is.na(currentvsnever) & currentvsnever==0], weights[!is.na(currentvsnever) & currentvsnever==0], probs=.5, na.rm=TRUE)
  wtd.quantile(smokercancer[!is.na(currentvsnever) & currentvsnever==0], weights[!is.na(currentvsnever) & currentvsnever==0], probs=.5, na.rm=TRUE)
  
  sum(!is.na(nonsmokercancer) & !is.na(currentvsformer))
  sum(!is.na(smokercancer) & !is.na(currentvsformer))
  sum(!is.na(nonsmokercancer) & !is.na(currentvsnever) & currentvsnever==0)
  sum(!is.na(smokercancer) & !is.na(currentvsnever)  & currentvsnever==0)

  arran <- function(a, b){
    out <- sum(weights[smokerdif<=b & smokerdif>=a & !is.na(smokerdif)])/sum(weights[!is.na(smokerdif)])
    out
  }
  
  arrancf <- function(a, b){
    out <- sum(weights[smokerdif<=b & smokerdif>=a & !is.na(smokerdif) & !is.na(currentvsformer)])/sum(weights[!is.na(smokerdif) & !is.na(currentvsformer)])
    out
  }
  
  arrancn <- function(a, b){
    out <- sum(weights[smokerdif<=b & smokerdif>=a & !is.na(smokerdif) & !is.na(currentvsnever) & currentvsnever==0])/sum(weights[!is.na(smokerdif) & !is.na(currentvsnever) & currentvsnever==0])
    out
  }
  
  first2 <- c(-1000,0,1,51,101,151,201,251,301,351,401,451,500,501,551,601,651,701,751,801,851,901,951)
  second2 <- c(-1,0,50,100,150,200,250,300,350,400,450,499,500,550,600,650,700,750,800,850,900,950,1000)

  var1 <- arran(first2[1], second2[1])
  runsum <- var1
  lastsum <- runsum
  var1cf <- arrancf(first2[1], second2[1])
  runsumcf <- var1cf
  lastsumcf <- runsumcf
  var1cn <- arrancn(first2[1], second2[1])
  runsumcn <- var1cn
  lastsumcn <- runsumcn
  anames <- "From -1000 to -1"
  for(i in 2:length(second2)){
    ar <- arran(first2[i], second2[i])
    var1 <- c(var1, ar)
    lastsum <- lastsum+ar
    runsum <- c(runsum, lastsum)
    arcf <- arrancf(first2[i], second2[i])
    var1cf <- c(var1cf, arcf)
    lastsumcf <- lastsumcf+arcf
    runsumcf <- c(runsumcf, lastsumcf)
    arcn <- arrancn(first2[i], second2[i])
    var1cn <- c(var1cn, arcn)
    lastsumcn <- lastsumcn+arcn
    runsumcn <- c(runsumcn, lastsumcn)
    anames <- c(anames, paste("From", first2[i], "to", second2[i]))
  }

  arpct <- cbind(var1, runsum)
  rownames(arpct) <- anames
  colnames(arpct) <- c("Valid percent", "Cumulative percent")
  arpctcf <- cbind(var1cf, runsumcf)
  rownames(arpctcf) <- anames
  colnames(arpctcf) <- c("Valid percent", "Cumulative percent")
  arpctcn <- cbind(var1cn, runsumcn)
  rownames(arpctcn) <- anames
  colnames(arpctcn) <- c("Valid percent", "Cumulative percent")
  
  write.csv(arpct, "attribriskpct.csv")
  
  ##########################  TABLE TWO  
  
  #Current and Former Smokers
  write.csv(arpctcf, "attribriskpctcf.csv")
  
  #Never Smokers 
  write.csv(arpctcn, "attribriskpctcn.csv")


  wtd.mean(smokerdif, weights, na.rm=TRUE)
  wtd.mean(smokerdif[!is.na(currentvsformer)], weights[!is.na(currentvsformer)], na.rm=TRUE)
  wtd.mean(smokerdif[!is.na(currentvsnever) & currentvsnever==0], weights[!is.na(currentvsnever) & currentvsnever==0], na.rm=TRUE)
  
  wtd.quantile(smokerdif, weights, probs=.5, na.rm=TRUE)
  wtd.quantile(smokerdif[!is.na(currentvsformer)], weights[!is.na(currentvsformer)], probs=.5, na.rm=TRUE)
  wtd.quantile(smokerdif[!is.na(currentvsnever)& currentvsnever==0], weights[!is.na(currentvsnever)& currentvsnever==0], probs=.5, na.rm=TRUE)
  
  sum(!is.na(smokerdif) & !is.na(currentvsformer))
  sum(!is.na(smokerdif) & !is.na(currentvsnever) & currentvsnever==0)
  
  rrran <- function(a, b){
    out <- sum(weights[rateincrease<b & rateincrease>=a & !is.na(rateincrease)])/sum(weights[!is.na(rateincrease)])
    out
  }
  
  rrrancf <- function(a, b){
    out <- sum(weights[rateincrease<b & rateincrease>=a & !is.na(rateincrease) & !is.na(currentvsformer)])/sum(weights[!is.na(rateincrease) & !is.na(currentvsformer)])
    out
  }

  rrrancn <- function(a, b){
    out <- sum(weights[rateincrease<b & rateincrease>=a & !is.na(rateincrease) & !is.na(currentvsnever) & currentvsnever==0])/sum(weights[!is.na(rateincrease) & !is.na(currentvsnever) & currentvsnever==0])
    out
  }
  
  first3 <- c(0,1,1.0001,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16,17,18,19,20,30,50,80)
  second3 <- c(1,1.0001,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16,17,18,19,20,30,50,80,1000.1)
  
  var1 <- rrran(first3[1], second3[1])
  runsum <- var1
  lastsum <- runsum
  var1cf <- rrrancf(first3[1], second3[1])
  runsumcf <- var1cf
  lastsumcf <- runsumcf
  var1cn <- rrrancn(first3[1], second3[1])
  runsumcn <- var1cn
  lastsumcn <- runsumcn
  anames <- "From 0 to .99"
  for(i in 2:length(second3)){
    rr <- rrran(first3[i], second3[i])
    var1 <- c(var1, rr)
    lastsum <- lastsum+rr
    runsum <- c(runsum, lastsum)
    rrcf <- rrrancf(first3[i], second3[i])
    var1cf <- c(var1cf, rrcf)
    lastsumcf <- lastsumcf+rrcf
    runsumcf <- c(runsumcf, lastsumcf)
    rrcn <- rrrancn(first3[i], second3[i])
    var1cn <- c(var1cn, rrcn)
    lastsumcn <- lastsumcn+rrcn
    runsumcn <- c(runsumcn, lastsumcn)
    anames <- c(anames, paste("From", first3[i], "to", second3[i]))
  }
  
  rrpct <- cbind(var1, runsum)
  rownames(rrpct) <- anames
  colnames(rrpct) <- c("Valid percent", "Cumulative percent")
  rrpctcf <- cbind(var1cf, runsumcf)
  rownames(rrpctcf) <- anames
  colnames(rrpctcf) <- c("Valid percent", "Cumulative percent")
  rrpctcn <- cbind(var1cn, runsumcn)
  rownames(rrpctcn) <- anames
  colnames(rrpctcn) <- c("Valid percent", "Cumulative percent")
  write.csv(rrpct, "relriskpct.csv")
  
  ##########################  TABLE THREE   
  
  #Current and  Former Smokers (FFRISP)
    write.csv(rrpctcf, "relriskpctcf.csv")
  #Current and Never Smokers (FFRISP)
    write.csv(rrpctcn, "relriskpctcn.csv")

  
  wtd.mean(rateincrease, weights, na.rm=TRUE)
  wtd.mean(rateincrease[!is.na(currentvsformer)], weights[!is.na(currentvsformer)], na.rm=TRUE)
  wtd.mean(rateincrease[!is.na(currentvsnever)& currentvsnever==0], weights[!is.na(currentvsnever)& currentvsnever==0], na.rm=TRUE)
  
  wtd.quantile(rateincrease, weights, probs=.5, na.rm=TRUE)
  wtd.quantile(rateincrease[!is.na(currentvsformer)], weights[!is.na(currentvsformer)], probs=.5, na.rm=TRUE)
  wtd.quantile(rateincrease[!is.na(currentvsnever)& currentvsnever==0], weights[!is.na(currentvsnever)& currentvsnever==0], probs=.5, na.rm=TRUE)
  
  sum(!is.na(smokerdif) & !is.na(currentvsnever) & currentvsnever==0)

